{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ed6ee437",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import shutil\n",
    "import random\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "0b9c94ca",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 指定数据集路径\n",
    "dataset_path = r'C:\\Users\\24566\\Desktop\\cv\\fruit81_full'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "487d72a9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "数据集 C:\\Users\\24566\\Desktop\\cv\\fruit81\n"
     ]
    }
   ],
   "source": [
    "dataset_name = dataset_path.split('_')[0]\n",
    "print('数据集', dataset_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "d9aa56f3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['人参果',\n",
       " '佛手瓜',\n",
       " '哈密瓜',\n",
       " '圣女果',\n",
       " '山楂',\n",
       " '山竹',\n",
       " '无花果',\n",
       " '木瓜',\n",
       " '李子',\n",
       " '杏',\n",
       " '杨桃',\n",
       " '杨梅',\n",
       " '枇杷',\n",
       " '枣',\n",
       " '柚子',\n",
       " '柠檬',\n",
       " '柿子',\n",
       " '树莓',\n",
       " '桂圆',\n",
       " '桑葚',\n",
       " '梨',\n",
       " '椰子',\n",
       " '榴莲',\n",
       " '樱桃',\n",
       " '橘子',\n",
       " '毛丹',\n",
       " '水蜜桃',\n",
       " '沃柑',\n",
       " '沙果',\n",
       " '沙棘',\n",
       " '油桃',\n",
       " '牛油果',\n",
       " '猕猴桃',\n",
       " '甘蔗',\n",
       " '甜瓜-伊丽莎白',\n",
       " '甜瓜-白',\n",
       " '甜瓜-绿',\n",
       " '甜瓜-金',\n",
       " '番石榴-百',\n",
       " '番石榴-红',\n",
       " '白兰瓜',\n",
       " '白心火龙果',\n",
       " '白萝卜',\n",
       " '百香果',\n",
       " '石榴',\n",
       " '砂糖橘',\n",
       " '粑粑柑',\n",
       " '红心火龙果',\n",
       " '红苹果',\n",
       " '羊奶果',\n",
       " '羊角蜜',\n",
       " '胡萝卜',\n",
       " '脐橙',\n",
       " '腰果',\n",
       " '芒果',\n",
       " '芦柑',\n",
       " '草莓',\n",
       " '荔枝',\n",
       " '莲雾',\n",
       " '菠萝',\n",
       " '菠萝莓',\n",
       " '菠萝蜜',\n",
       " '葡萄-白',\n",
       " '葡萄-红',\n",
       " '蓝莓',\n",
       " '蛇皮果',\n",
       " '蟠桃',\n",
       " '血橙',\n",
       " '西柚',\n",
       " '西梅',\n",
       " '西瓜',\n",
       " '西红柿',\n",
       " '车厘子',\n",
       " '酸角',\n",
       " '金桔',\n",
       " '青柠',\n",
       " '青苹果',\n",
       " '香橼',\n",
       " '香蕉',\n",
       " '黄桃',\n",
       " '黑莓']"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "classes = os.listdir(dataset_path)\n",
    "classes\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "77ba96fa",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "81"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(classes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "0519afe2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 创建 train 文件夹\n",
    "os.mkdir(os.path.join(dataset_path, 'train'))\n",
    "\n",
    "# 创建 test 文件夹\n",
    "os.mkdir(os.path.join(dataset_path, 'val'))\n",
    "\n",
    "# 在 train 和 test 文件夹中创建各类别子文件夹\n",
    "for fruit in classes:\n",
    "    os.mkdir(os.path.join(dataset_path, 'train', fruit))\n",
    "    os.mkdir(os.path.join(dataset_path, 'val', fruit))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "b6ee78dd",
   "metadata": {},
   "outputs": [],
   "source": [
    "test_frac = 0.2  # 测试集比例\n",
    "random.seed(123) # 随机数种子，便于复现"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "55ad8d49",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "        类别              训练集数据个数            测试集数据个数      \n",
      "       人参果                146                 36        \n",
      "       佛手瓜                129                 32        \n",
      "       哈密瓜                157                 39        \n",
      "       圣女果                158                 39        \n",
      "        山楂                159                 39        \n",
      "        山竹                152                 38        \n",
      "       无花果                156                 39        \n",
      "        木瓜                156                 38        \n",
      "        李子                154                 38        \n",
      "        杏                 158                 39        \n",
      "        杨桃                157                 39        \n",
      "        杨梅                153                 38        \n",
      "        枇杷                151                 37        \n",
      "        枣                 156                 38        \n",
      "        柚子                156                 39        \n",
      "        柠檬                122                 30        \n",
      "        柿子                154                 38        \n",
      "        树莓                149                 37        \n",
      "        桂圆                159                 39        \n",
      "        桑葚                156                 39        \n",
      "        梨                 155                 38        \n",
      "        椰子                159                 39        \n",
      "        榴莲                159                 39        \n",
      "        樱桃                133                 33        \n",
      "        橘子                145                 36        \n",
      "        毛丹                127                 31        \n",
      "       水蜜桃                141                 35        \n",
      "        沃柑                159                 39        \n",
      "        沙果                153                 38        \n",
      "        沙棘                147                 36        \n",
      "        油桃                160                 39        \n",
      "       牛油果                120                 30        \n",
      "       猕猴桃                158                 39        \n",
      "        甘蔗                158                 39        \n",
      "     甜瓜-伊丽莎白               75                 18        \n",
      "       甜瓜-白                68                 17        \n",
      "       甜瓜-绿                35                 8         \n",
      "       甜瓜-金                42                 10        \n",
      "      番石榴-百               105                 26        \n",
      "      番石榴-红               121                 30        \n",
      "       白兰瓜                103                 25        \n",
      "      白心火龙果               148                 37        \n",
      "       白萝卜                160                 39        \n",
      "       百香果                151                 37        \n",
      "        石榴                153                 38        \n",
      "       砂糖橘                148                 36        \n",
      "       粑粑柑                154                 38        \n",
      "      红心火龙果               159                 39        \n",
      "       红苹果                142                 35        \n",
      "       羊奶果                156                 39        \n",
      "       羊角蜜                157                 39        \n",
      "       胡萝卜                149                 37        \n",
      "        脐橙                154                 38        \n",
      "        腰果                160                 40        \n",
      "        芒果                139                 34        \n",
      "        芦柑                146                 36        \n",
      "        草莓                159                 39        \n",
      "        荔枝                158                 39        \n",
      "        莲雾                156                 39        \n",
      "        菠萝                158                 39        \n",
      "       菠萝莓                 91                 22        \n",
      "       菠萝蜜                160                 39        \n",
      "       葡萄-白               125                 31        \n",
      "       葡萄-红               160                 39        \n",
      "        蓝莓                158                 39        \n",
      "       蛇皮果                138                 34        \n",
      "        蟠桃                145                 36        \n",
      "        血橙                150                 37        \n",
      "        西柚                147                 36        \n",
      "        西梅                158                 39        \n",
      "        西瓜                156                 38        \n",
      "       西红柿                150                 37        \n",
      "       车厘子                136                 33        \n",
      "        酸角                153                 38        \n",
      "        金桔                145                 36        \n",
      "        青柠                119                 29        \n",
      "       青苹果                156                 39        \n",
      "        香橼                104                 25        \n",
      "        香蕉                155                 38        \n",
      "        黄桃                155                 38        \n",
      "        黑莓                150                 37        \n"
     ]
    }
   ],
   "source": [
    "df = pd.DataFrame()\n",
    "\n",
    "print('{:^18} {:^18} {:^18}'.format('类别', '训练集数据个数', '测试集数据个数'))\n",
    "\n",
    "for fruit in classes: # 遍历每个类别\n",
    "\n",
    "    # 读取该类别的所有图像文件名\n",
    "    old_dir = os.path.join(dataset_path, fruit)\n",
    "    images_filename = os.listdir(old_dir)\n",
    "    random.shuffle(images_filename) # 随机打乱\n",
    "\n",
    "    # 划分训练集和测试集\n",
    "    testset_numer = int(len(images_filename) * test_frac) # 测试集图像个数\n",
    "    testset_images = images_filename[:testset_numer]      # 获取拟移动至 test 目录的测试集图像文件名\n",
    "    trainset_images = images_filename[testset_numer:]     # 获取拟移动至 train 目录的训练集图像文件名\n",
    "\n",
    "    # 移动图像至 test 目录\n",
    "    for image in testset_images:\n",
    "        old_img_path = os.path.join(dataset_path, fruit, image)         # 获取原始文件路径\n",
    "        new_test_path = os.path.join(dataset_path, 'val', fruit, image) # 获取 test 目录的新文件路径\n",
    "        shutil.move(old_img_path, new_test_path) # 移动文件\n",
    "\n",
    "    # 移动图像至 train 目录\n",
    "    for image in trainset_images:\n",
    "        old_img_path = os.path.join(dataset_path, fruit, image)           # 获取原始文件路径\n",
    "        new_train_path = os.path.join(dataset_path, 'train', fruit, image) # 获取 train 目录的新文件路径\n",
    "        shutil.move(old_img_path, new_train_path) # 移动文件\n",
    "    \n",
    "    # 删除旧文件夹\n",
    "    assert len(os.listdir(old_dir)) == 0 # 确保旧文件夹中的所有图像都被移动走\n",
    "    shutil.rmtree(old_dir) # 删除文件夹\n",
    "    \n",
    "    # 工整地输出每一类别的数据个数\n",
    "    print('{:^18} {:^18} {:^18}'.format(fruit, len(trainset_images), len(testset_images)))\n",
    "    \n",
    "    # 保存到表格中\n",
    "    df = pd.concat([df, pd.DataFrame({'类别':fruit,'训练集数量': [len(trainset_images)], '测试集数量': [len(testset_images)]})], ignore_index=True)\n",
    "# 重命名数据集文件夹\n",
    "shutil.move(dataset_path, dataset_name+'_split')\n",
    "\n",
    "# 数据集各类别数量统计表格，导出为 csv 文件\n",
    "df['数量'] = df['训练集数量'] + df['测试集数量']\n",
    "df.to_csv('数据量统计.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "66659fb7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>类别</th>\n",
       "      <th>训练集数量</th>\n",
       "      <th>测试集数量</th>\n",
       "      <th>数量</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>人参果</td>\n",
       "      <td>146</td>\n",
       "      <td>36</td>\n",
       "      <td>182</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>佛手瓜</td>\n",
       "      <td>129</td>\n",
       "      <td>32</td>\n",
       "      <td>161</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>哈密瓜</td>\n",
       "      <td>157</td>\n",
       "      <td>39</td>\n",
       "      <td>196</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>圣女果</td>\n",
       "      <td>158</td>\n",
       "      <td>39</td>\n",
       "      <td>197</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>山楂</td>\n",
       "      <td>159</td>\n",
       "      <td>39</td>\n",
       "      <td>198</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>76</th>\n",
       "      <td>青苹果</td>\n",
       "      <td>156</td>\n",
       "      <td>39</td>\n",
       "      <td>195</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>77</th>\n",
       "      <td>香橼</td>\n",
       "      <td>104</td>\n",
       "      <td>25</td>\n",
       "      <td>129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>78</th>\n",
       "      <td>香蕉</td>\n",
       "      <td>155</td>\n",
       "      <td>38</td>\n",
       "      <td>193</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79</th>\n",
       "      <td>黄桃</td>\n",
       "      <td>155</td>\n",
       "      <td>38</td>\n",
       "      <td>193</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>80</th>\n",
       "      <td>黑莓</td>\n",
       "      <td>150</td>\n",
       "      <td>37</td>\n",
       "      <td>187</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>81 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     类别  训练集数量  测试集数量   数量\n",
       "0   人参果    146     36  182\n",
       "1   佛手瓜    129     32  161\n",
       "2   哈密瓜    157     39  196\n",
       "3   圣女果    158     39  197\n",
       "4    山楂    159     39  198\n",
       "..  ...    ...    ...  ...\n",
       "76  青苹果    156     39  195\n",
       "77   香橼    104     25  129\n",
       "78   香蕉    155     38  193\n",
       "79   黄桃    155     38  193\n",
       "80   黑莓    150     37  187\n",
       "\n",
       "[81 rows x 4 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "cv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
